# -*- coding: utf-8 -*-
"""
Created on Fri Sep  8 16:11:20 2023
####
@author: yys
"""


## 先奖原始文件放入一个文件夹 后批量转成ASCII 再运行该程序
import re
import os
import pandas as pd
from datetime import datetime
import numpy as np
# 指定包含CSV文件的文件夹路径

# 指定包含CSV文件的文件夹路径
# folder_path = "J:\\NJ MWR\\SZLS\\Y2023\\M06！\\met\\"
# nameyear=folder_path[16:20]
# namemonth=folder_path[22:24]
# namelist=folder_path[25:28]
################### 重采样并计算均值 温度 气压 相对湿度 降水速率 风速 前几分钟算数平均 #时间步长 如要修改 直接改10/30T 1H
tt='5T'
# tt='30T'
# tt='1H'
###################
folder_path = "J:\\BJ MWR\\Y2019\\met\\"
nameyear=folder_path[11:15]
namelist=folder_path[16:19]

# 创建一个空的DataFrame来存储合并后的数据
data = pd.DataFrame()

ariables = ["year","month","day","hour","minute","second","rain flag","pressure(hPa)", "temperature(K)","humidity(%)",
            "wind_speed(km/h)","wind_direction(°)", "RR(mm/h)"]#RR（mm/h） 降水速率

data_types = {col: 'int16' for col in ariables[:7]}
data_types.update({col: 'float32' for col in ariables[7:]})


# 获取文件夹内所有CSV文件的文件名
csv_files = [file for file in os.listdir(folder_path) if file.endswith('.ASC')]



# 循环读取每个CSV文件并将其合并到merged_data中
for csv_file in csv_files:
    file_path = os.path.join(folder_path, csv_file)
    print('读取'+csv_file)
    # 读取当前CSV文件的数据
    current_data = pd.read_csv(file_path, sep=',', encoding='gbk', skiprows=18, header=None)
    current_data.columns = ariables
    current_data = current_data.astype(data_types)
    data = pd.concat([data, current_data])    
 
 


print('计算中')

# 判断第一列是否以2开头的数字 #剔除异常数据

condition = ~data['year'].astype(str).str.startswith('19')
# 将不符合条件的整行替换为缺失值
data.loc[condition, :] = np.nan
# 删除整行都缺失的行
data.dropna(how='all', inplace=True)

#剔除无风对风向计算影响
data['wind_direction(°)'] = np.where((data['wind_speed(km/h)'] == 0) & (data['wind_direction(°)'] == 0), np.nan, data['wind_direction(°)'])

# 将风向从度数转换为弧度
data['wind_direction_rad'] = np.radians(data['wind_direction(°)'])
# 计算平均风速的x和y分量
data['wind_speed_x(km/h)'] = data['wind_speed(km/h)'] * np.cos(data['wind_direction_rad'])
data['wind_speed_y(km/h)'] = data['wind_speed(km/h)'] * np.sin(data['wind_direction_rad'])


ls = data[['year', 'month', 'day', 'hour', 'minute', 'second']].astype(int).astype(str).agg('-'.join, axis=1)
data['Timestamp'] = pd.to_datetime(ls, format='%y-%m-%d-%H-%M-%S')

# 将Timestamp列设置为索引
data.set_index('Timestamp', inplace=True)

data1 = data[["pressure(hPa)", "temperature(K)", "humidity(%)", "RR(mm/h)","wind_speed(km/h)",'wind_speed_x(km/h)','wind_speed_y(km/h)']].resample(tt).mean()
data2 = data[["rain flag"]].resample(tt).max()#标志有无降水
# 计算平均风向（将弧度转换为度数） 风向剔除无风后 用矢量计算
data1['avg_wind_direction(°)'] = np.degrees(np.arctan2(data1['wind_speed_y(km/h)'], data1['wind_speed_x(km/h)']))
# 将小于0的风向值加上360
data1['avg_wind_direction(°)'] = data1['avg_wind_direction(°)'].apply(lambda x: x + 360 if x < 0 else x)
combined_data = pd.concat([data1, data2], axis=1)



# 重新设置Timestamp列为普通列
combined_data.reset_index(inplace=True)



# # 将合并后的数据保存为一个新的CSV文件
combined_data_path = 'J:\\BJ MWR\\'+namelist+'\\'+tt+'\\'+nameyear+'_'+namelist+'_'+tt+'.csv'

combined_data.to_csv(combined_data_path, index=False,encoding='utf-8')






        